rm(list = ls())
require(mfx)
require(stats4)
require(lmtest)
require(bbmle)
require(msm)
require(nlWaldTest)
require(sandwich)
require(stargazer)
require(mfx)


data2old=read.csv("C:/Users/naten/OneDrive - University of Tennessee/Dean Stuff/data/infoexp2_data/responses.csv", header=TRUE)
data13=read.csv("C:/Users/naten/OneDrive - University of Tennessee/Dean Stuff/data/columbia_experiment_july25_type13clean.csv", header=TRUE)
data15=read.csv("C:/Users/naten/OneDrive - University of Tennessee/Dean Stuff/data/columbia_experiment_August12_type15(clean).csv", header=TRUE)



dataframe2old=data.frame(data2old)
dataframe13=data.frame(data13)
dataframe15=data.frame(data15)


#fancy regression for easy clustering by Markus Conrad https://www.r-bloggers.com/2021/05/clustered-standard-errors-with-r/
ols <- function(form, data, robust=FALSE, cluster=NULL,digits=3){
  r1 <- lm(form, data)
  if(length(cluster)!=0){
    data <- na.omit(data[,c(colnames(r1$model),cluster)])
    r1 <- lm(form, data)
  }
  X <- model.matrix(r1)
  n <- dim(X)[1]
  k <- dim(X)[2]
  if(robust==FALSE & length(cluster)==0){
    se <- sqrt(diag(solve(crossprod(X)) * as.numeric(crossprod(resid(r1))/(n-k))))
    res <- cbind(coef(r1),se)
  }
  if(robust==TRUE){
    u <- matrix(resid(r1))
    meat1 <- t(X) %*% diag(diag(crossprod(t(u)))) %*% X
    dfc <- n/(n-k)    
    se <- sqrt(dfc*diag(solve(crossprod(X)) %*% meat1 %*% solve(crossprod(X))))
    vcov <- solve(crossprod(X)) %*% meat1 %*% solve(crossprod(X))
    res <- cbind(coef(r1),se)
    }
  if(length(cluster)!=0){
    clus <- cbind(X,data[,cluster],resid(r1))
    colnames(clus)[(dim(clus)[2]-1):dim(clus)[2]] <- c(cluster,"resid")
    m <- dim(table(clus[,cluster]))
    dfc <- (m/(m-1))*((n-1)/(n-k))
    uclust  <- apply(resid(r1)*X,2, function(x) tapply(x, clus[,cluster], sum))
    se <- sqrt(diag(solve(crossprod(X)) %*% (t(uclust) %*% uclust) %*% solve(crossprod(X)))*dfc)   
    vcov <- solve(crossprod(X)) %*% (t(uclust) %*% uclust) %*% solve(crossprod(X))
    res <- cbind(coef(r1),se)
    }
  res <- cbind(res,res[,1]/res[,2],(1-pnorm(abs(res[,1]/res[,2])))*2)
  res1 <- matrix(as.numeric(sprintf(paste("%.",paste(digits,"f",sep=""),sep=""),res)),nrow=dim(res)[1])
  rownames(res1) <- rownames(res)
  colnames(res1) <- c("Estimate","Std. Error","t value","Pr(>|t|)")
  out=list(res1,vcov)
  return(out)
}




priors=c(0.5,0.6,0.75,0.85)

expframe=rbind(dataframe13,dataframe15,dataframe2old)
expframe=subset(expframe,uid>200)

#note that player 234 did not finish and so has no real data
expframe=subset(expframe,uid!=234)


#definitions
expframe$priorA=(expframe$qid==1)*0.5+(expframe$qid==5)*0.6+(expframe$qid==6)*0.75+(expframe$qid==7)*0.85
expframe$priorB=1-expframe$priorA
expframe$Aresponse=(expframe$response==6)
expframe$Bresponse=(expframe$response==7)
expframe$stateA=(expframe$state==4)
expframe$stateB=(expframe$state==10)
expframe$correct=expframe$stateA*expframe$Aresponse+expframe$stateB*expframe$Bresponse
uidlist=unique(expframe$uid)

#find N
#**
length(uidlist)
#**

#Number of reworks
N=1000
#Significant violations
sigviol=rep(0,4)


for(k in 1:N){
#Check for hypothetical baserate behavior
probAgiven1vec=vector('numeric')
probAgiven2vec=vector('numeric')
for(i in 1:length(uidlist)){
playframe=subset(expframe,uid==uidlist[i]&priorA==0.5)
probAgiven1vec[i]=sum(playframe$Aresponse*playframe$stateA)/sum(playframe$stateA)
probAgiven2vec[i]=sum(playframe$Aresponse*(1-playframe$stateA))/sum(1-playframe$stateA)
}

#resimulate data
fakeframe=data.frame()
for(i in 1:length(uidlist)){
playframe=subset(expframe,uid==uidlist[i])
for(j in 1:length(playframe$uid)){
if(playframe$stateA[j]==1){
playframe$Aresponse[j]=rbinom(1,size=1,prob=probAgiven1vec[i])
}else{
playframe$Aresponse[j]=rbinom(1,size=1,prob=probAgiven2vec[i])
}

}
fakeframe=rbind(fakeframe,playframe)
}


#Begin constructing first table
pAgiven2=vector('numeric')
restrictionpAgiven1=vector('numeric')
pAgiven1=vector('numeric')
prob=vector('numeric')

#Start building the first table
#for 0.5 prior
frame50=subset(fakeframe,priorA==0.5)
output50=ols(Aresponse~stateA,data=frame50,robust=TRUE,cluster="uid")
model50=output50[[1]]
vcov50=output50[[2]][1:2,1:2]
pAgiven2[1]=model50[1]
restrictionpAgiven1[1]=((2*priors[1]-1)/priors[1])+pAgiven2[1]*((1-priors[1])/priors[1])
pAgiven1[1]=model50[1]+model50[2]
#inter=nlWaldtest(texts="b[1]+b[2]-((2*x-1)/x)+b[1]*((1-x)/x)",coeff=model50[1:2],Vcov=vcov50,x=priors[1])
#prob[1]=as.numeric(inter$p.value)
sterr=as.numeric(deltamethod(~x1+x2-((2*0.5-1)/0.5)+x1*((1-0.5)/0.5),model50[1:2],vcov50))
tstat=abs(pAgiven1[1]-restrictionpAgiven1[1])/sterr
prob[1]=1-pnorm(tstat)
if(prob[1]<0.05&pAgiven1[1]<restrictionpAgiven1[1]){
sigviol[1]=sigviol[1]+1
}


frame60=subset(fakeframe,priorA==0.6)
output60=ols(Aresponse~stateA,data=frame60,robust=TRUE,cluster="uid")
model60=output60[[1]]
vcov60=output60[[2]][1:2,1:2]
pAgiven2[2]=model60[1]
restrictionpAgiven1[2]=((2*priors[2]-1)/priors[2])+pAgiven2[2]*((1-priors[2])/priors[2])
pAgiven1[2]=model60[1]+model60[2]
#inter=nlWaldtest(texts="b[1]+b[2]-((2*x-1)/x)+b[1]*((1-x)/x)",coeff=model60[1:2],Vcov=vcov60,x=priors[2])
#prob[2]=as.numeric(inter$p.value)
sterr=as.numeric(deltamethod(~x1+x2-((2*0.5-1)/0.5)+x1*((1-0.5)/0.5),model60[1:2],vcov60))
tstat=abs(pAgiven1[2]-restrictionpAgiven1[2])/sterr
prob[2]=1-pnorm(tstat)
if(prob[2]<0.05&pAgiven1[2]<restrictionpAgiven1[2]){
sigviol[2]=sigviol[2]+1
}



frame75=subset(fakeframe,priorA==0.75)
output75=ols(Aresponse~stateA,data=frame75,robust=TRUE,cluster="uid")
model75=output75[[1]]
vcov75=output75[[2]][1:2,1:2]
pAgiven2[3]=model75[1]
restrictionpAgiven1[3]=((2*priors[3]-1)/priors[3])+pAgiven2[3]*((1-priors[3])/priors[3])
pAgiven1[3]=model75[1]+model75[2]
#inter=nlWaldtest(texts="b[1]+b[2]-((2*x-1)/x)+b[1]*((1-x)/x)",coeff=model75[1:2],Vcov=vcov75,x=priors[3])
#prob[3]=as.numeric(inter$p.value)
sterr=as.numeric(deltamethod(~x1+x2-((2*0.5-1)/0.5)+x1*((1-0.5)/0.5),model75[1:2],vcov75))
tstat=abs(pAgiven1[3]-restrictionpAgiven1[3])/sterr
prob[3]=1-pnorm(tstat)
if(prob[3]<0.05&pAgiven1[3]<restrictionpAgiven1[3]){
sigviol[3]=sigviol[3]+1
}



frame85=subset(fakeframe,priorA==0.85)
output85=ols(Aresponse~stateA,data=frame85,robust=TRUE,cluster="uid")
model85=output85[[1]]
vcov85=output85[[2]][1:2,1:2]
pAgiven2[4]=model85[1]
restrictionpAgiven1[4]=((2*priors[4]-1)/priors[4])+pAgiven2[4]*((1-priors[4])/priors[4])
pAgiven1[4]=model85[1]+model85[2]
#inter=nlWaldtest(texts="b[1]+b[2]-((2*x-1)/x)+b[1]*((1-x)/x)",coeff=model85[1:2],Vcov=vcov85,x=priors[4])
#prob[4]=as.numeric(inter$p.value)
sterr=as.numeric(deltamethod(~x1+x2-((2*0.5-1)/0.5)+x1*((1-0.5)/0.5),model85[1:2],vcov85))
tstat=abs(pAgiven1[4]-restrictionpAgiven1[4])/sterr
prob[4]=1-pnorm(tstat)
if(prob[4]<0.05&pAgiven1[4]<restrictionpAgiven1[4]){
sigviol[4]=sigviol[4]+1
}
print(k)
}
sigviol=sigviol/N
sigviol


